
# --------------------------------------------------------------------------------
# setup

# install and load packages
require(pacman)
pacman::p_load(likert, lme4, janitor, tidyverse)
               
# global options
options(scipen = 20)

# path to working directory (named "LP8")
base_path <- "~/Desktop/LP8"

# read and clean data for descriptives
read_csv(file.path(base_path, "lp_data.csv")) %>%
  mutate(id = factor(paste0("id", 1:n())),
         respondent_gender = factor(respondent_gender, levels = 0:1, labels = c("Male", "Female")),
	     vignettes_gender = factor(vignettes_gender, levels = 0:1, labels = c("Male", "Female")),
	     respondent_department_major = factor(respondent_department_major, levels = 0:1, labels = c("Non-Stem", "Stem"))) ->
dat
glimpse(dat)

# reshape and clean data for model inference
dat %>%
  pivot_longer(cols = contains("score"), names_to = "vignette", values_to = "score") %>%
  mutate(vignette = factor(gsub("(\\w{2}).+", "\\1", vignette)),
	     score_fac = factor(ifelse(score >= 4, 1, 0))) ->
dat_long
glimpse(dat_long)


# --------------------------------------------------------------------------------
# compute descriptive counts

# compute total and percent female respondents
dat %>% tabyl(respondent_gender)

# total and percent respondents who belonged to a STEM field
dat %>% tabyl(respondent_department_major)


# --------------------------------------------------------------------------------
# Vignette versions

# Question 1: Student suffering from imposter syndrome to make it in physics
# Both "matching" and "leaking" mindset should encourage both genders

# Question 2: Student has lost interest in physics
# "Matching" mindset should discourage both genders, "leaking" mindset should encourage females

# Question 3: Student does not think physics PhD worth it because grad schools into which student was admitted known for failing to produce professors
# "Matching" mindset should discourage both genders, "leaking" mindset should encourage females


# --------------------------------------------------------------------------------
# Data labels

# q1score: Score response for Question 1 ranging from 1 (strongly discourage) to 6 (strongly encourage)
# q2score: Score response for Question 2 ranging from 1 (strongly discourage) to 6 (strongly encourage)
# q3score: Score response for Question 3 ranging from 1 (strongly discourage) to 6 (strongly encourage)
# vignettes_gender: Whether hypothetical physics students in the vignettes randomly assigned to respondent were male or female
# repondent_gender: Whether respondent was male or female
# repondent_department_major: Whether respondent had a major in a male-dominated STEM field or not

	
# --------------------------------------------------------------------------------
# Figure 1

# Purpose: visualize extent to which respondents encourage vs. discourage in response to each vignette, contrasting male and female hypothetical students.

Q_scale <- c("strongly discourage", "moderately discourage", "slightly discourage",
             "slightly encourage", "moderately encourage", "strongly encourage")
             
dat %>%
  #select(q1score, q2score, q3score,) %>%
  mutate_at(vars(all_of(c("q1score", "q2score", "q3score"))),
    list(~ case_when(
    . == 1 ~ "strongly discourage",
    . == 2 ~ "moderately discourage",
    . == 3 ~ "slightly discourage",
    . == 4 ~ "slightly encourage",
    . == 5 ~ "moderately encourage",
    . == 6 ~ "strongly encourage"))) %>%
  mutate_at(vars(all_of(c("q1score", "q2score", "q3score"))),
    list(~ factor(., levels = Q_scale, ordered = TRUE))) %>%
  rename("Imposter syndrome" = "q1score", 
         "Loss of interest" = "q2score", 
         "Conflicting personal goals" = "q3score") %>%
  mutate(vignettes_gender = factor(vignettes_gender, levels = c("Female", "Male"), labels = c("female student", "male student")),
         respondent_gender = factor(respondent_gender, levels = c("Female", "Male"), labels = c("female respondent", "male respondent"))) ->
dat_likert

f1_likert <- dat_likert %>% select(`Imposter syndrome`, `Loss of interest`, `Conflicting personal goals`) %>% 
               as.data.frame() %>%
               likert(grouping = dat_likert$vignettes_gender) 

cols <- c("red4", "red3", "red", "royalblue1", "royalblue3", "royalblue4")

f1 <- plot(f1_likert, type="bar", col=cols, ordered=TRUE, text.size=1.5) + 
  ylab("Percentage of respondents") +
  theme_classic() + 
  guides(fill = guide_legend(ncol = 1)) +
  theme(legend.position = "bottom",
        legend.direction = "horizontal",
        legend.title = element_text(size=8),
        strip.text = element_text(size=8),
        axis.text = element_text(size=8),
        axis.title.x = element_text(size=8))
ggsave(f1, file = file.path(base_path, "Fig_1.pdf"), height = 6, width = 3.46457)


# --------------------------------------------------------------------------------
# Figure 2

# Purpose: visualize higher odds of female professors encouraging
 
f2_likert <- dat_likert %>% select(`Imposter syndrome`, `Loss of interest`, `Conflicting personal goals`) %>% 
               as.data.frame() %>%
               likert(grouping = dat_likert$respondent_gender)

f2 <- plot(f2_likert, type="bar", col=cols, ordered=TRUE, text.size=1.5) + 
  ylab("Percentage of respondents") +
  theme_classic() + 
  guides(fill = guide_legend(ncol = 1)) +
  theme(legend.position = "bottom",
        legend.direction = "horizontal",
        legend.title = element_text(size=8),
        strip.text = element_text(size=8),
        axis.text = element_text(size=8),
        axis.title.x = element_text(size=8))
ggsave(f2, file = file.path(base_path, "Fig_2.pdf"), height = 6, width = 3.46457)


# --------------------------------------------------------------------------------
# model-based inference

# ----------------------------------------
# Hypothesis 1: Females are encouraged more even while accounting for varied reasons to leave STEM
# Hypothesis 2: Respondent's gender influences encouragement

# mixed effects logistic regression target model
mod1 <- glmer(score_fac ~ vignette + vignettes_gender + respondent_department_major + respondent_gender + (1 | id),
              data = dat_long, 
              family = binomial(link="logit"),
              control = glmerControl(optimizer = "bobyqa"), 
              nAGQ = 10)

# results to put into LaTeX table
summary(mod1)
exp(fixef(mod1))
exp(confint(mod1))

# mixed effects logistic regression null model
mod_null <- glmer(score_fac ~ 1 + (1 | id), 
                  data = dat_long, 
                  family = binomial(link="logit"),
                  control = glmerControl(optimizer = "bobyqa"), 
                  nAGQ = 10)

# likelihood ratio test to see if model is improvement over null                  
anova(mod_null, mod1)


# ----------------------------------------
# Hypothesis 3: Females are encouraged more irrespective of reason for leaving STEM, more so for respondents in STEM

# mixed effects logistic regression target model
mod2 <- glmer(score_fac ~ vignette + vignettes_gender * respondent_department_major + respondent_gender + (1 | id),
              data = dat_long, 
              family = binomial(link="logit"),
              control = glmerControl(optimizer = "bobyqa"), 
              nAGQ = 10)

# results to put into LaTeX table
summary(mod2)
exp(fixef(mod2))
exp(confint(mod2))

# likelihood ratio test to see if target model is improvement over null
anova(mod_null, mod2)

# likelihood ratio test to see if including interaction term improves the model
anova(mod1, mod2)

